@@ -42,6 +42,8 @@ module Agents |
||
42 | 42 |
Set `expected_update_period_in_days` to the maximum amount of time that you'd expect to pass between Events being created by this Agent. This is only used to set the "working" status. |
43 | 43 |
|
44 | 44 |
Set `uniqueness_look_back` to limit the number of events checked for uniqueness (typically for performance). This defaults to the larger of #{UNIQUENESS_LOOK_BACK} or #{UNIQUENESS_FACTOR}x the number of detected received results. |
45 |
+ |
|
46 |
+ Set `force_encoding` to an encoding name if the website does not return a Content-Type header with a proper charset. |
|
45 | 47 |
MD |
46 | 48 |
|
47 | 49 |
event_description do |
@@ -85,6 +87,19 @@ module Agents |
||
85 | 87 |
if options['uniqueness_look_back'].present? |
86 | 88 |
errors.add(:base, "Invalid uniqueness_look_back format") unless is_positive_integer?(options['uniqueness_look_back']) |
87 | 89 |
end |
90 |
+ |
|
91 |
+ if (encoding = options['force_encoding']).present? |
|
92 |
+ case encoding |
|
93 |
+ when String |
|
94 |
+ begin |
|
95 |
+ Encoding.find(encoding) |
|
96 |
+ rescue ArgumentError |
|
97 |
+ errors.add(:base, "Unknown encoding: #{encoding.inspect}") |
|
98 |
+ end |
|
99 |
+ else |
|
100 |
+ errors.add(:base, "force_encoding must be a string") |
|
101 |
+ end |
|
102 |
+ end |
|
88 | 103 |
end |
89 | 104 |
|
90 | 105 |
def check |
@@ -99,7 +114,11 @@ module Agents |
||
99 | 114 |
end |
100 | 115 |
|
101 | 116 |
request.on_success do |response| |
102 |
- doc = parse(response.body) |
|
117 |
+ body = response.body |
|
118 |
+ if (encoding = options['force_encoding']).present? |
|
119 |
+ body = body.encode(Encoding::UTF_8, encoding) |
|
120 |
+ end |
|
121 |
+ doc = parse(body) |
|
103 | 122 |
|
104 | 123 |
if extract_full_json? |
105 | 124 |
if store_payload!(previous_payloads(1), doc) |
@@ -32,7 +32,17 @@ describe Agents::WebsiteAgent do |
||
32 | 32 |
lambda { @checker.save! }.should raise_error; |
33 | 33 |
@checker.options = @site |
34 | 34 |
end |
35 |
- |
|
35 |
+ |
|
36 |
+ it "should validate the force_encoding option" do |
|
37 |
+ @checker.options['force_encoding'] = 'UTF-8' |
|
38 |
+ lambda { @checker.save! }.should_not raise_error; |
|
39 |
+ @checker.options['force_encoding'] = ['UTF-8'] |
|
40 |
+ lambda { @checker.save! }.should raise_error; |
|
41 |
+ @checker.options['force_encoding'] = 'UTF-42' |
|
42 |
+ lambda { @checker.save! }.should raise_error; |
|
43 |
+ @checker.options = @site |
|
44 |
+ end |
|
45 |
+ |
|
36 | 46 |
it "should check for changes (and update Event.expires_at)" do |
37 | 47 |
lambda { @checker.check }.should change { Event.count }.by(1) |
38 | 48 |
event = Event.last |
@@ -83,6 +93,62 @@ describe Agents::WebsiteAgent do |
||
83 | 93 |
end |
84 | 94 |
end |
85 | 95 |
|
96 |
+ describe 'encoding' do |
|
97 |
+ it 'should be forced with force_encoding option' do |
|
98 |
+ huginn = "\u{601d}\u{8003}" |
|
99 |
+ stub_request(:any, /no-encoding/).to_return(:body => { |
|
100 |
+ :value => huginn, |
|
101 |
+ }.to_json.encode(Encoding::EUC_JP), :headers => { |
|
102 |
+ 'Content-Type' => 'application/json', |
|
103 |
+ }, :status => 200) |
|
104 |
+ site = { |
|
105 |
+ 'name' => "Some JSON Response", |
|
106 |
+ 'expected_update_period_in_days' => 2, |
|
107 |
+ 'type' => "json", |
|
108 |
+ 'url' => "http://no-encoding.example.com", |
|
109 |
+ 'mode' => 'on_change', |
|
110 |
+ 'extract' => { |
|
111 |
+ 'value' => { 'path' => 'value' }, |
|
112 |
+ }, |
|
113 |
+ 'force_encoding' => 'EUC-JP', |
|
114 |
+ } |
|
115 |
+ checker = Agents::WebsiteAgent.new(:name => "No Encoding Site", :options => site) |
|
116 |
+ checker.user = users(:bob) |
|
117 |
+ checker.save! |
|
118 |
+ |
|
119 |
+ checker.check |
|
120 |
+ event = Event.last |
|
121 |
+ event.payload['value'].should == huginn |
|
122 |
+ end |
|
123 |
+ |
|
124 |
+ it 'should be overridden with force_encoding option' do |
|
125 |
+ huginn = "\u{601d}\u{8003}" |
|
126 |
+ stub_request(:any, /wrong-encoding/).to_return(:body => { |
|
127 |
+ :value => huginn, |
|
128 |
+ }.to_json.encode(Encoding::EUC_JP), :headers => { |
|
129 |
+ 'Content-Type' => 'application/json; UTF-8', |
|
130 |
+ }, :status => 200) |
|
131 |
+ site = { |
|
132 |
+ 'name' => "Some JSON Response", |
|
133 |
+ 'expected_update_period_in_days' => 2, |
|
134 |
+ 'type' => "json", |
|
135 |
+ 'url' => "http://wrong-encoding.example.com", |
|
136 |
+ 'mode' => 'on_change', |
|
137 |
+ 'extract' => { |
|
138 |
+ 'value' => { 'path' => 'value' }, |
|
139 |
+ }, |
|
140 |
+ 'force_encoding' => 'EUC-JP', |
|
141 |
+ } |
|
142 |
+ checker = Agents::WebsiteAgent.new(:name => "Wrong Encoding Site", :options => site) |
|
143 |
+ checker.user = users(:bob) |
|
144 |
+ checker.save! |
|
145 |
+ |
|
146 |
+ checker.check |
|
147 |
+ event = Event.last |
|
148 |
+ event.payload['value'].should == huginn |
|
149 |
+ end |
|
150 |
+ end |
|
151 |
+ |
|
86 | 152 |
describe '#working?' do |
87 | 153 |
it 'checks if events have been received within the expected receive period' do |
88 | 154 |
stubbed_time = Time.now |